library(dada2)
packageVersion("dada2") # check dada2 version
## [1] '1.22.0'
library(Biostrings)
library(ShortRead)
library(seqTools) # per base sequence content
library(phyloseq)
library(ggplot2)
library(data.table)
library(plyr)
library(dplyr)
library(qckitfastq) # per base sequence content
library(stringr)
# ROOT DIRECTORY (to modify on your computer)
path.root <- "~/Projects/MetaIBS"
path.hugerth <- file.path(path.root, "scripts/analysis-individual/Hugerth-2019")
path.data <- file.path(path.root, "data/analysis-individual/Hugerth-2019")
First, we import the fastq files containing the raw reads. The samples were downloaded from the ENA database with the accession number PRJEB31817.
# Save the path to the directory containing the fastq zipped files
path.fastq <- file.path(path.data, "raw_fastq")
# list.files(path.fastq) # check we are in the right directory
# fastq filenames have format: SAMPLENAME.fastq.gz
# Saves the whole directory path to each file name
fnFs <- sort(list.files(path.fastq, pattern="_1.fastq.gz", full.names = TRUE)) # forward
fnRs <- sort(list.files(path.fastq, pattern="_2.fastq.gz", full.names = TRUE)) # reverse
show(fnFs[1:5])
show(fnRs[1:5])
#length(fnFs) # should be 607
# Extract sample names, assuming filenames have format: SAMPLENAME.fastq.gz
sample.names <- sapply(strsplit(basename(fnFs), "_1.fastq.gz"), `[`, 1)
show(sample.names[1:5]) # saves only the file name (without the path)
# Look at quality of all files
for (i in 1:3){ # 1:length(fnFs)
show(plotQualityProfile(fnFs[i]))
show(plotQualityProfile(fnRs[i]))
}
# Look at nb of reads per sample
# raw_stats <- data.frame('sample' = sample.names,
# 'reads' = fastqq(fnFs)@nReads)
# min(raw_stats$reads)
# max(raw_stats$reads)
# mean(raw_stats$reads)
We will have a quick peak at the per base sequence content of the reads in some samples, to make sure there is no anomaly (i.e. all reads having the same sequence).
# Look at per base sequence content (forward read)
fseqF <- seqTools::fastqq(fnFs[1])
## [fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Hugerth-2019/raw_fastq/ERR3548557_1.fastq.gz' done.
rcF <- read_content(fseqF)
plot_read_content(rcF) + labs(title = "Per base sequence content - Forward read")
# Look at per base sequence content (reverse read)
fseqR <- seqTools::fastqq(fnRs[1])
## [fastqq] File ( 1/1) '/Users/enigma/Projects/MetaIBS/data/analysis-individual/Hugerth-2019/raw_fastq/ERR3548557_2.fastq.gz' done.
rcR <- read_content(fseqR)
plot_read_content(rcR) + labs(title = "Per base sequence content - Reverse read")
Now, we will look whether the reads still contain the primers. Primer sequences could be found in the SRARunTable (metadata table from SRA).
# V3-V4
FWD <- "CCTACGGGNGGCWGCAG" # 341F primer sequence
REV <- "GACTACHVGGGTATCTAATCC" # 805R primer sequence
# Function that, from the primer sequence, will return all combinations possible (complement, reverse complement, ...)
allOrients <- function(primer) {
# Create all orientations of the input sequence
require(Biostrings)
dna <- DNAString(primer) # The Biostrings works w/ DNAString objects rather than character vectors
orients <- c(Forward = dna, Complement = complement(dna), Reverse = reverse(dna),
RevComp = reverseComplement(dna)) # get all orientations
return(sapply(orients, toString)) # Convert back to character vector
}
# Get all combinations of the primer sequences
FWD.orients <- allOrients(FWD) # 341F
REV.orients <- allOrients(REV) # 805R
FWD.orients # sanity check
## Forward Complement Reverse RevComp
## "CCTACGGGNGGCWGCAG" "GGATGCCCNCCGWCGTC" "GACGWCGGNGGGCATCC" "CTGCWGCCNCCCGTAGG"
REV.orients
## Forward Complement Reverse
## "GACTACHVGGGTATCTAATCC" "CTGATGDBCCCATAGATTAGG" "CCTAATCTATGGGVHCATCAG"
## RevComp
## "GGATTAGATACCCBDGTAGTC"
# Function that counts number of reads in which a sequence is found
primerHits <- function(primer, fn) {
nhits <- vcountPattern(primer, sread(readFastq(fn)), fixed = FALSE, max.mismatch = 2)
return(sum(nhits > 0))
}
# Look in all samples how many times the 341F and 805R primers are found in the reads of each sample
for (i in 1:5){
cat("SAMPLE", sample.names[i], "with total number of", raw_stats[i,'reads'], "reads\n\n")
x <- rbind(ForwardRead.FWDPrimer = sapply(FWD.orients, primerHits, fn = fnFs[[i]]),
ForwardRead.REVPrimer = sapply(REV.orients, primerHits, fn = fnFs[[i]]),
ReverseRead.FWDPrimer = sapply(FWD.orients, primerHits, fn = fnRs[[i]]),
ReverseRead.REVPrimer = sapply(REV.orients, primerHits, fn = fnRs[[i]]))
print(x)
cat("\n____________________________________________\n\n")
}
## SAMPLE ERR3548557 with total number of 33358 reads
##
## Forward Complement Reverse RevComp
## ForwardRead.FWDPrimer 33096 0 0 0
## ForwardRead.REVPrimer 0 0 0 4
## ReverseRead.FWDPrimer 16 16 15 25
## ReverseRead.REVPrimer 32943 10 8 7
##
## ____________________________________________
##
## SAMPLE ERR3548558 with total number of 33983 reads
##
## Forward Complement Reverse RevComp
## ForwardRead.FWDPrimer 33738 0 0 0
## ForwardRead.REVPrimer 0 0 0 16
## ReverseRead.FWDPrimer 15 15 15 37
## ReverseRead.REVPrimer 33663 4 8 1
##
## ____________________________________________
##
## SAMPLE ERR3548559 with total number of 23973 reads
##
## Forward Complement Reverse RevComp
## ForwardRead.FWDPrimer 23799 0 0 0
## ForwardRead.REVPrimer 0 0 0 602
## ReverseRead.FWDPrimer 5 5 5 610
## ReverseRead.REVPrimer 23742 1 0 0
##
## ____________________________________________
##
## SAMPLE ERR3548560 with total number of 22510 reads
##
## Forward Complement Reverse RevComp
## ForwardRead.FWDPrimer 22321 0 0 0
## ForwardRead.REVPrimer 0 0 0 1019
## ReverseRead.FWDPrimer 18 19 17 1047
## ReverseRead.REVPrimer 22273 5 7 2
##
## ____________________________________________
##
## SAMPLE ERR3548563 with total number of 24172 reads
##
## Forward Complement Reverse RevComp
## ForwardRead.FWDPrimer 24025 0 0 0
## ForwardRead.REVPrimer 0 0 0 10
## ReverseRead.FWDPrimer 7 6 6 19
## ReverseRead.REVPrimer 23852 3 4 2
##
## ____________________________________________
Let’s have a quick look at where primers are positioned in the forward/reverse reads
# Function that gets position in which sequence is found
primerHitsPosition <- function(primer, fn, primertype){
hits <- as.data.frame(vmatchPattern(primer, sread(readFastq(fn)), fixed = FALSE, max.mismatch = 2))
hits <- hits[,c("group", "start")]
colnames(hits) <- c("sample", "start")
if(nrow(hits) != 0){
hits$sample <- sapply(strsplit(basename(fn), "_"), `[`, 1)
hits$readslength <- seqTools::fastqq(fn)@maxSeqLen
hits <- hits %>% group_by(sample, start) %>% count() %>% ungroup() %>% mutate(primer=primertype)
return(hits)
}
}
# Get position of primers in forward reads
FWDpos <- data.frame()
for(i in 1:length(fnFs)){
cat("SAMPLE", i)
newF <- NULL
newR <- NULL
newF <- primerHitsPosition(FWD.orients["Forward"], fnFs[[i]], primertype="FWDprimer")
newR <- primerHitsPosition(REV.orients["RevComp"], fnFs[[i]], primertype="REVprimer")
FWDpos <- rbind(newF, newR, FWDpos)
}
# Get position of REV primers
REVpos <- data.frame()
for(i in 1:length(fnRs)){
cat("SAMPLE", i)
newF <- NULL
newR <- NULL
newF <- primerHitsPosition(FWD.orients["RevComp"], fnRs[[i]], primertype="FWDprimer")
newR <- primerHitsPosition(REV.orients["Forward"], fnRs[[i]], primertype="REVprimer")
REVpos <- rbind(newF, newR, REVpos)
}
ggplot(FWDpos, aes(x=start, y=freq))+
geom_point() +
facet_wrap(~primer)+
xlim(c(0,max(FWDpos$readslength)))+
labs(x="start position of primer", y="# reads with primers starting at x position", title="FORWARD READS")
ggplot(REVpos, aes(x=start, y=freq))+
geom_point() +
facet_wrap(~primer)+
xlim(c(0,max(REVpos$readslength)))+
labs(x="start position of primer", y="# reads with primers starting at x position", title="REVERSE READS")
The reads indeed contain the forward primer in the forward reads, and the reverse primer in the reverse reads. We will keep only reads containing the primer, and then remove the primer! We won’t do any filtering on the revcomp primers though (revcomp REVprimer in forward reads; FWDprimer in reverse reads), as they’re present only in a minority of reads (and not all samples either).
# KEEP READS WITH PRIMER AND REMOVE PRIMER+BARCODE
# Place filtered files in a filtered1/ subdirectory
FWD.filt1_samples <- file.path(path.data, "filtered1", paste0(sample.names, "_F_filt1.fastq.gz")) # FWD reads
REV.filt1_samples <- file.path(path.data, "filtered1", paste0(sample.names, "_R_filt1.fastq.gz")) # REV reads
# Assign names for the filtered fastq.gz files
names(FWD.filt1_samples) <- sample.names
names(REV.filt1_samples) <- sample.names
# Keep only reads with primers & remove primers
FWD.out1 <- removePrimers(fn = fnFs, fout = FWD.filt1_samples,
primer.fwd = FWD.orients[["Forward"]],
trim.fwd = TRUE,
orient = FALSE, # re-orient reads if needed
compress = TRUE, verbose = TRUE)
REV.out1 <- removePrimers(fn = fnRs, fout = REV.filt1_samples,
primer.fwd = REV.orients[["Forward"]],
trim.fwd = TRUE,
orient = FALSE, # re-orient reads if needed
compress = TRUE, verbose = TRUE)
Then, we perform a quality filtering of the reads.
# Place filtered files in a filtered/ subdirectory
FWD.filt2_samples <- file.path(path.data, "filtered2", paste0(sample.names, "_F_filt2.fastq.gz")) # FWD reads
REV.filt2_samples <- file.path(path.data, "filtered2", paste0(sample.names, "_R_filt2.fastq.gz")) # REV reads
# Assign names for the filtered fastq.gz files
names(FWD.filt2_samples) <- sample.names
names(REV.filt2_samples) <- sample.names
# Create matrix that will count the reads input & reads output (after quality filter)
out2 <- NULL
out2 <- matrix(0L, nrow=length(607), ncol=2)
colnames(out2) <- c("reads.in", "reads.out")
# Quality filter
for (i in 1:607){
# Follow computation
if(i%%10 == 0){print(i)}
tryCatch({
out <- NULL # initialize
out <- filterAndTrim(fwd = FWD.filt1_samples[i], filt = FWD.filt2_samples[i],
rev = REV.filt1_samples[i], filt.rev = REV.filt2_samples[i],
maxEE=3, # reads with more than 3 expected errors (sum(10e(-Q/10))) are discarded
truncQ=10, # Truncate reads at the first instance of a quality score less than or equal to truncQ.
minLen = 150, # Discard reads shorter than 150 bp. This is done after trimming and truncation.
matchIDs = TRUE, # match forward & reverse reads (discard reads that don't have their pair)
id.sep="/", # to identify matched pairs (if samples downloaded from ENA)
multithread = TRUE, compress=TRUE, verbose=TRUE)
out2 <- rbind(out2, out)
#print(out2)
#print("-------")
#cat(rownames(out))
}, error=function(e){cat("ERROR :", conditionMessage(e), "\n")})
}
# Remove first row that only has 0
out2 <- out2[-1,]
# 3 files had an error, which ones?
setdiff(sample.names, sub("_.*", "", rownames(out2)))
# Output: ERR3586005, ERR3586042, ERR3586312
# When looking at the header of the gzip files, they have "@1" written, and not
# "@HWI-M03284:58:000000000-B4YML:1:1101:20603:1834" that allow to match forward & reverse reads
# Let's save the file paths of the 604 samples that passed the filter
fnF.qc <- sort(list.files(file.path(path.data, "filtered2"), pattern="_F_filt2.fastq.gz", full.names = TRUE)) # forward
fnR.qc <- sort(list.files(file.path(path.data, "filtered2"), pattern="_R_filt2.fastq.gz", full.names = TRUE)) # reverse
Let’s look at the output filtered fastq files as sanity check.
out2[1:4,] # show how many reads were filtered in each file
## reads.in reads.out
## ERR3548557_F_filt1.fastq.gz 33096 25953
## ERR3548558_F_filt1.fastq.gz 33738 25991
## ERR3548559_F_filt1.fastq.gz 23799 17695
## ERR3548560_F_filt1.fastq.gz 22321 16417
# Look at quality profile of all filtered files
for (i in 1:3){
show(plotQualityProfile(FWD.filt2_samples[i]))
show(plotQualityProfile(REV.filt2_samples[i]))
}
Now we will build the parametric error model, to be able to infer amplicon sequence variants (ASVs) later on.
set.seed(123)
errF <- learnErrors(fnF.qc, multithread=TRUE, randomize = TRUE, verbose = 1)
set.seed(123)
errR <- learnErrors(fnR.qc, multithread=TRUE, randomize = TRUE, verbose = 1)
The error rates for each possible transition (A→C, A→G, …) are shown. Points are the observed error rates for each consensus quality score. The black line shows the estimated error rates after convergence of the machine-learning algorithm. The red line shows the error rates expected under the nominal definition of the Q-score. Here the estimated error rates (black line) are a good fit to the observed rates (points), and the error rates drop with increased quality as expected.
plotErrors(errF, nominalQ = TRUE) # Forward reads
plotErrors(errR, nominalQ = TRUE) # Reverse reads
The dada() algorithm infers sequence variants based on estimated errors (previous step). Firstly, we de-replicate the reads in each sample, to reduce the computation time. De-replication is a common step in almost all modern ASV inference (or OTU picking) pipelines, but a unique feature of derepFastq is that it maintains a summary of the quality information for each dereplicated sequence in $quals.
# Dereplicate the reads in the sample
derepF <- derepFastq(fnF.qc) # forward
derepR <- derepFastq(fnR.qc) # reverse
# Infer sequence variants
dadaFs <- dada(derepF, err=errF, multithread=TRUE) # forward
dadaRs <- dada(derepR, err=errR, multithread=TRUE) # reverse
# Inspect the infered sequence variants from sample 1:2
for (i in 1:2){
print(dadaFs[[i]])
print(dadaRs[[i]])
print("________________")
}
## dada-class: object describing DADA2 denoising results
## 133 sequence variants were inferred from 6594 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
## dada-class: object describing DADA2 denoising results
## 63 sequence variants were inferred from 7589 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
## [1] "________________"
## dada-class: object describing DADA2 denoising results
## 256 sequence variants were inferred from 9312 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
## dada-class: object describing DADA2 denoising results
## 141 sequence variants were inferred from 10747 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
## [1] "________________"
mergers <- mergePairs(dadaFs, derepF, dadaRs, derepR, verbose=TRUE)
head(mergers[[1]])
We can now construct an amplicon sequence variant table (ASV) table, a higher-resolution version of the OTU table produced by traditional methods.
# Make sequence table from the infered sequence variants
seqtable <- makeSequenceTable(mergers)
# We should have 604 samples (604 rows)
dim(seqtable)
## [1] 604 16735
# Inspect distribution of sequence lengths
hist(nchar(getSequences(seqtable)), breaks = 100, xlab = "ASV length", ylab = "Number of ASVs", main="")
# Sequences should be between 341F - 805R, so around 427bp (removing the primers lengths).
# Considering that we kept only reads containing the primers, and merged the reads, we shouldn't have sequences below ~400bp
# Remove any sequence variant outside 380-480bp
seqtable.new <- seqtable[,nchar(colnames(seqtable)) %in% 380:480]
dim(seqtable.new)
## [1] 604 16478
hist(nchar(getSequences(seqtable.new)), breaks = 100, xlab = "ASV length", ylab = "Number of ASVs", main="")
# check we haven't lost too many counts
sum(seqtable.new)/sum(seqtable)
## [1] 0.9992934
The core dada method corrects substitution and indel errors, but chimeras remain. Fortunately, the accuracy of sequence variants after denoising makes identifying chimeric ASVs simpler than when dealing with fuzzy OTUs. Chimeric sequences are identified if they can be exactly reconstructed by combining a left-segment and a right-segment from two more abundant “parent” sequences.
seqtable.nochim <- removeBimeraDenovo(seqtable.new, method="consensus", multithread=TRUE, verbose=TRUE)
# Remove _1_filt.fastq.gz from rownames
# rownames(seqtable.nochim)[1:5]
rownames(seqtable.nochim) <- sub("_.*", "", rownames(seqtable.nochim))
# Check how many sequence variants we have after removing chimeras
dim(seqtable.nochim)
## [1] 604 5902
# Check how many reads we have after removing chimeras (we should keep the vast majority of the reads, like > 80%)
sum(seqtable.nochim)/sum(seqtable.new)
## [1] 0.9726593
Sanity check before assigning taxonomy.
# Function that counts nb of reads
getN <- function(x) sum(getUniques(x))
# Table that will count number of reads for each process of interest (input reads, filtered reads, denoised reads, non chimera reads)
track <- cbind(FWD.out1[!rownames(FWD.out1) %in% grep("ERR3586005|ERR3586042|ERR3586312", rownames(FWD.out1), value=T),],
data.frame("primerfiltR"=REV.out1[!rownames(REV.out1) %in% grep("ERR3586005|ERR3586042|ERR3586312", rownames(REV.out1), value=T),2]),
data.frame("qualityfilt"=out2[,2]),
sapply(dadaFs, getN),
sapply(dadaRs, getN),
sapply(mergers, getN),
rowSums(seqtable.nochim),
as.integer(rowSums(seqtable.nochim)*100/FWD.out1[!rownames(FWD.out1) %in% grep("ERR3586005|ERR3586042|ERR3586312", rownames(FWD.out1), value=T),1]))
# Assign column and row names
colnames(track) <- c("input", "primerfiltF", "primerfiltR", "quality-filt", "denoisedF", "denoisedR", 'merged', 'nonchim', "%input->output")
rownames(track) <- rownames(seqtable.nochim)
# Show final table: for each row/sample, we have shown the initial number of reads, filtered reads, denoised reads, and non chimera reads
track
Extensions: The dada2 package also implements a method to make species level assignments based on exact matching between ASVs and sequenced reference strains. Recent analysis suggests that exact matching (or 100% identity) is the only appropriate way to assign species to 16S gene fragments. Currently, species-assignment training fastas are available for the Silva and RDP 16S databases. To follow the optional species addition step, download the silva_species_assignment_v132.fa.gz file, and place it in the directory with the fastq files.
path.silva <- file.path(path.root, "data/analysis-individual/CLUSTER/taxonomy/silva-taxonomic-ref")
# Assign taxonomy (with silva v138)
taxa <- assignTaxonomy(seqtable.nochim, file.path(path.silva, "silva_nr99_v138.1_train_set.fa.gz"),
tryRC = TRUE, # try reverse complement of the sequences
multithread=TRUE, verbose = TRUE)
# Add species assignment
taxa <- addSpecies(taxa, file.path(path.silva, "silva_species_assignment_v138.1.fa.gz"))
# Check how the taxonomy table looks like
taxa.print <- taxa
rownames(taxa.print) <- NULL # Removing sequence rownames for display only
head(taxa.print)
## Kingdom Phylum Class Order
## [1,] "Bacteria" "Firmicutes" "Clostridia" "Lachnospirales"
## [2,] "Bacteria" "Firmicutes" "Clostridia" "Oscillospirales"
## [3,] "Bacteria" "Firmicutes" "Clostridia" "Oscillospirales"
## [4,] "Bacteria" "Verrucomicrobiota" "Verrucomicrobiae" "Verrucomicrobiales"
## [5,] "Bacteria" "Proteobacteria" "Gammaproteobacteria" "Enterobacterales"
## [6,] "Bacteria" "Firmicutes" "Clostridia" "Oscillospirales"
## Family Genus Species
## [1,] "Lachnospiraceae" "Blautia" NA
## [2,] "Ruminococcaceae" "Faecalibacterium" "prausnitzii"
## [3,] "Ruminococcaceae" "Faecalibacterium" "prausnitzii"
## [4,] "Akkermansiaceae" "Akkermansia" "muciniphila"
## [5,] "Enterobacteriaceae" "Escherichia-Shigella" NA
## [6,] "Ruminococcaceae" "Subdoligranulum" NA
table(taxa.print[,1], useNA="ifany") # Show the different kingdoms (should be only bacteria/archaea)
##
## Archaea Bacteria
## 6 5896
table(taxa.print[,2], useNA="ifany") # Show the different Phyla
##
## Actinobacteriota Bacteroidota Campylobacterota Cyanobacteria
## 272 1461 2 54
## Deinococcota Desulfobacterota Elusimicrobiota Euryarchaeota
## 1 77 3 4
## Firmicutes Fusobacteriota Halanaerobiaeota Patescibacteria
## 3625 12 2 3
## Proteobacteria Spirochaetota Synergistota Thermoplasmatota
## 290 2 19 2
## Verrucomicrobiota WPS-2 <NA>
## 60 1 12
We will remove any sample with less than 500 reads from further analysis, and also any ASVs with unassigned phyla.
The preprocessing will be easier to do with ASV, taxonomic and metadata tables combined in a phyloseq object.
#________________________
# Import metadata
metadata_table <- read.csv(file.path(path.data, "00_Metadata-Hugerth/Metadata-Hugerth.csv"), row.names=1)
rownames(metadata_table) <- metadata_table$Run
# head(metadata_table)
#_________________________
# Create phyloseq object
physeq <- phyloseq(otu_table(seqtable.nochim, taxa_are_rows=FALSE),
sample_data(metadata_table[metadata_table$Run %in% rownames(seqtable.nochim),]),
tax_table(taxa))
# Remove taxa that are eukaryota, or have unassigned Phyla
physeq <- subset_taxa(physeq, Kingdom != "Eukaryota")
physeq <- subset_taxa(physeq, !is.na(Phylum))
# Remove samples with less than 500 reads
physeq <- prune_samples(sample_sums(physeq)>=500, physeq) # 79 samples discarded
# Some taxa might have been present only in these low-count samples, so we will make sure to remove taxa that are absent in all samples
physeq <- prune_taxa(taxa_sums(physeq)>0, physeq)
# Absolute abundance
# plot_bar(physeq, fill = "Phylum")+ facet_wrap("host_disease", scales="free_x") + theme(axis.text.x = element_blank())
# Relative abundance for Phylum
phylum.table <- physeq %>%
tax_glom(taxrank = "Phylum") %>% # agglomerate at phylum level
transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance
psmelt() # Melt to long format
ggplot(phylum.table, aes(x = reorder(Sample, Sample, function(x) mean(phylum.table[Sample == x & Phylum == 'Bacteroidota', 'Abundance'])),
y = Abundance, fill = Phylum))+
facet_wrap(~ host_disease, scales = "free") + # scales = "free" removes empty lines
geom_bar(stat = "identity") +
theme(axis.text.x = element_blank())+
guides(fill=guide_legend(ncol=2))+
labs(x = "Samples", y = "Relative abundance")
# Save to disk
saveRDS(raw_stats, file.path(path.data, "01_Dada2-Hugerth/raw_stats.rds"))
saveRDS(FWDpos, file.path(path.data, "01_Dada2-Hugerth/forwardreads_primerposition.rds"))
saveRDS(REVpos, file.path(path.data, "01_Dada2-Hugerth/reversereads_primerposition.rds"))
saveRDS(FWD.out1, file.path(path.data, "01_Dada2-Hugerth/FWD_out1.rds"))
saveRDS(REV.out1, file.path(path.data, "01_Dada2-Hugerth/REV_out1.rds"))
saveRDS(out2, file.path(path.data, "01_Dada2-Hugerth/out2.rds"))
saveRDS(errF, file.path(path.data, "01_Dada2-Hugerth/errF.rds"))
saveRDS(errR, file.path(path.data, "01_Dada2-Hugerth/errR.rds"))
saveRDS(dadaFs, file.path(path.data, "01_Dada2-Hugerth/infered_seq_F.rds"))
saveRDS(dadaRs, file.path(path.data, "01_Dada2-Hugerth/infered_seq_R.rds"))
saveRDS(mergers, file.path(path.data, "01_Dada2-Hugerth/mergers.rds"))
saveRDS(track, file.path(path.data, "01_Dada2-Hugerth/track.rds"))
# Taxa & Phyloseq object
saveRDS(taxa, file.path(path.data, "01_Dada2-Hugerth/taxa_hugerth.rds"))
saveRDS(physeq, file.path(path.root, "data/analysis-individual/CLUSTER/phylotree/input/physeq_hugerth.rds"))
saveRDS(physeq, file.path(path.root, "data/phyloseq-objects/phyloseq-without-phylotree/physeq_hugerth.rds"))
sessionInfo()
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] stringr_1.5.0 qckitfastq_1.10.0
## [3] dplyr_1.1.1 plyr_1.8.8
## [5] data.table_1.14.8 ggplot2_3.4.2
## [7] phyloseq_1.38.0 seqTools_1.28.0
## [9] zlibbioc_1.40.0 ShortRead_1.52.0
## [11] GenomicAlignments_1.30.0 SummarizedExperiment_1.24.0
## [13] Biobase_2.54.0 MatrixGenerics_1.6.0
## [15] matrixStats_0.63.0 Rsamtools_2.10.0
## [17] GenomicRanges_1.46.1 BiocParallel_1.28.3
## [19] Biostrings_2.62.0 GenomeInfoDb_1.30.1
## [21] XVector_0.34.0 IRanges_2.28.0
## [23] S4Vectors_0.32.4 BiocGenerics_0.40.0
## [25] dada2_1.22.0 Rcpp_1.0.10
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-162 bitops_1.0-7 RColorBrewer_1.1-3
## [4] tools_4.1.3 bslib_0.4.2 utf8_1.2.3
## [7] R6_2.5.1 vegan_2.6-4 mgcv_1.8-42
## [10] DBI_1.1.3 colorspace_2.1-0 permute_0.9-7
## [13] rhdf5filters_1.6.0 ade4_1.7-22 withr_2.5.0
## [16] tidyselect_1.2.0 compiler_4.1.3 cli_3.6.1
## [19] DelayedArray_0.20.0 labeling_0.4.2 sass_0.4.5
## [22] scales_1.2.1 RSeqAn_1.14.0 digest_0.6.31
## [25] rmarkdown_2.21 jpeg_0.1-10 pkgconfig_2.0.3
## [28] htmltools_0.5.5 highr_0.10 fastmap_1.1.1
## [31] rlang_1.1.0 rstudioapi_0.14 farver_2.1.1
## [34] jquerylib_0.1.4 generics_0.1.3 hwriter_1.3.2.1
## [37] jsonlite_1.8.4 RCurl_1.98-1.12 magrittr_2.0.3
## [40] GenomeInfoDbData_1.2.7 biomformat_1.22.0 interp_1.1-4
## [43] Matrix_1.5-1 munsell_0.5.0 Rhdf5lib_1.16.0
## [46] fansi_1.0.4 ape_5.7-1 lifecycle_1.0.3
## [49] stringi_1.7.12 yaml_2.3.7 MASS_7.3-58.3
## [52] rhdf5_2.38.1 grid_4.1.3 parallel_4.1.3
## [55] crayon_1.5.2 deldir_1.0-6 lattice_0.20-45
## [58] splines_4.1.3 multtest_2.50.0 knitr_1.42
## [61] pillar_1.9.0 igraph_1.4.2 reshape2_1.4.4
## [64] codetools_0.2-19 glue_1.6.2 evaluate_0.20
## [67] latticeExtra_0.6-30 RcppParallel_5.1.7 png_0.1-8
## [70] vctrs_0.6.1 foreach_1.5.2 gtable_0.3.3
## [73] cachem_1.0.7 xfun_0.38 survival_3.5-5
## [76] tibble_3.2.1 iterators_1.0.14 cluster_2.1.4